`%>%` <- magrittr::`%>%`

Definition of delta statistic

stat_auc <- function(x, y) {
  measure <- c(x, y)
  classes <- c(rep("X", length(x)), rep("Y", length(y)))
  return(rocauc::auc_by(measure, classes, "Y") - 0.5)
}
apply_stat <- function(dx, dy, var, stat) return(stat(dx[[var]], dy[[var]]))

Plot of English statistics for segments attested with frequency >= 5

## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Number of potential consonants by language

ncons_by_lang <- added_consonants %>% 
  dplyr::select_at(dplyr::vars(-labels, -freq, -scores)) %>% 
  apply(2, sum) %>%
  (function(x) tibble::tibble(nsegs=x, language=names(x)))

Top N languages

N_LANG <- 10
dplyr::arrange(ncons_by_lang, -nsegs) %>% head(N_LANG) %>% print
## # A tibble: 10 x 2
##    nsegs language
##    <dbl> <chr>   
##  1    39 uby     
##  2    32 ady     
##  3    29 gdo     
##  4    28 kbd     
##  5    28 lez     
##  6    28 tkr     
##  7    27 ven     
##  8    26 nbl     
##  9    25 mrt     
## 10    24 ibi
## [[1]]
##             stat_econ    stat_loc  stat_glob
## stat_econ  1.00000000 -0.01026796 -0.4808708
## stat_loc  -0.01026796  1.00000000  0.2951242
## stat_glob -0.48087075  0.29512423  1.0000000
## 
## [[2]]
##             stat_econ   stat_loc  stat_glob
## stat_econ  1.00000000 0.06164304 -0.4973912
## stat_loc   0.06164304 1.00000000  0.3384304
## stat_glob -0.49739123 0.33843036  1.0000000
## 
## [[3]]
##             stat_econ    stat_loc  stat_glob
## stat_econ  1.00000000 -0.08309264 -0.3556031
## stat_loc  -0.08309264  1.00000000 -0.1521580
## stat_glob -0.35560314 -0.15215796  1.0000000
## 
## [[4]]
##            stat_econ   stat_loc   stat_glob
## stat_econ  1.0000000 0.15086081 -0.44944164
## stat_loc   0.1508608 1.00000000  0.03661718
## stat_glob -0.4494416 0.03661718  1.00000000
## 
## [[5]]
##            stat_econ    stat_loc   stat_glob
## stat_econ  1.0000000  0.19540347 -0.41207608
## stat_loc   0.1954035  1.00000000 -0.05907608
## stat_glob -0.4120761 -0.05907608  1.00000000
## 
## [[6]]
##            stat_econ  stat_loc  stat_glob
## stat_econ  1.0000000 0.2964218 -0.0524047
## stat_loc   0.2964218 1.0000000  0.1260489
## stat_glob -0.0524047 0.1260489  1.0000000
## 
## [[7]]
##            stat_econ    stat_loc   stat_glob
## stat_econ  1.0000000  0.14095463 -0.28296890
## stat_loc   0.1409546  1.00000000 -0.05849594
## stat_glob -0.2829689 -0.05849594  1.00000000
## 
## [[8]]
##            stat_econ   stat_loc   stat_glob
## stat_econ  1.0000000 0.21024927 -0.27856790
## stat_loc   0.2102493 1.00000000  0.03457613
## stat_glob -0.2785679 0.03457613  1.00000000
## 
## [[9]]
##            stat_econ   stat_loc  stat_glob
## stat_econ  1.0000000  0.3340491 -0.5291890
## stat_loc   0.3340491  1.0000000 -0.1597613
## stat_glob -0.5291890 -0.1597613  1.0000000
## 
## [[10]]
##            stat_econ   stat_loc  stat_glob
## stat_econ  1.0000000 -0.1144513 -0.3864826
## stat_loc  -0.1144513  1.0000000  0.3685129
## stat_glob -0.3864826  0.3685129  1.0000000

Merge the five “common” languages that would work OK (Hindi, Malayalam, Venda, Ndebele, and Kabardian)

stats %>%
  dplyr::filter(hin == 1 | mal == 1 | ven == 1 | nbl == 1 | kbd == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi by itself

stats %>%
  dplyr::filter(hin == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays
write.csv(stats$labels,file="labels.csv")

Kabardian by itself

stats %>%
  dplyr::filter(kbd == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Kabardian prime by itself

stats %>%
  dplyr::filter(kbd_prime == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Kabardian prime and hindi

stats %>%
  dplyr::filter(kbd_prime == 1|hin ==1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi and Kabardian

stats %>%
  dplyr::filter(hin == 1 | kbd == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi and Malayalam

stats %>%
  dplyr::filter(hin == 1 | mal == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays

Hindi, Malayalam, and Georgian

stats %>%
  dplyr::filter(hin == 1 | kat == 1 | mal == 1) %>%
  plotly::plot_ly(x=~stat_econ,
                  y=~stat_loc,
                  z=~stat_glob,
                  text=~labels,
                  color=~log(freq),
                  type="scatter3d", mode="text") %>%
  plotly::add_markers()
## Warning: textfont.color doesn't (yet) support data arrays

## Warning: textfont.color doesn't (yet) support data arrays